START

Goal/Purpose of operations: 
The DepMap/PRISM’s primary  pooled drug screens were used to help evaluate if a candidate could be a suitable candidate (if that drug was tested). The primary screen calculated the median of log fold change median fluorescence intensity between replicates of a cell line treated with a drug. The PRISM study considered a cell line as sensitive to a treatment if the median-collapsed fold-change is less than 0.3. 

Finished psedocode on: 
220503

System which operations were done on:
my laptop

GitHub Repo:
Transfer_Learning_R03

Docker:
rstudio_cancer_dr

Directory of operations:
/home - docker

Scripts being edited for operations: 
NA

Data being used: 
PRISM/DEPMAP data downloaded from depmap.org data explore tool. 220503- download data
/output/TF_L_GBM/220503_PRISM_DEPMAP_candidate_data/

Papers and tools:
NA

STEPS

Set working directory

load in data

primary_gbm_res <- read.csv(file= "~/output/220808_prism_gbm_candidates.csv" )
library(readr)
cell_gbm_info<- read_csv("~/data/DEPMAP_PRISM_220228/cell-line-selector_gbm_top_ten.csv")
## Rows: 61 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): lineage3, depmapId, displayName, Tumor Type, Gender, Microsatellit...
## dbl (23): temozolomide Drug sensitivity (PRISM Repurposing Primary Screen) 1...
## lgl  (1): LongTable-checkbox
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

remove the odd names

cell_gbm_info<- cell_gbm_info[, -c(1:3, 5)]
library(tidyr)
cell_gbm_info_longer <- pivot_longer(cell_gbm_info, c(2:12), names_to = "drug", values_to = "log2change")

removing the extra info in the drug stuff

cell_gbm_info_longer$drug <- sub(" .*", "", cell_gbm_info_longer$drug)

remove NAs from the logfold change data

cell_gbm_info_longer_v2 <- cell_gbm_info_longer[ ! is.na(cell_gbm_info_longer$log2change),]

Analysis

library(ggplot2)
library(cowplot)
library(viridis)
## Loading required package: viridisLite
library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
p2 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, y= drug, color= drug))+ geom_point(stat = "identity") +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3) 

p1 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, fill= drug)) + geom_density(alpha=.5) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3)

plot_grid(p1, p2, ncol = 1, align = "v")

cell_gbm_info_longer_v2$tmz_cell<- ifelse(cell_gbm_info_longer_v2$displayName %in% c("A172", "GB1","KNS81", "SF295", "YH13", "YKG1"), "TMZ_RES", "TMZ_Sen")
gbm_candidate_prism_plot_tmz <- function(cell_line_info, drug){
  
  cell_gbm_info_longer_v2<- cell_line_info[cell_line_info$drug %in% c(drug, "temozolomide"),]
    
  #cell_gbm_info_longer_v2$shape <- ifelse(cell_gbm_info_longer_v2$tmz_cell== "TMZ_RES", 19, 17)
  cell_gbm_info_longer_v2$tmz_cell<- factor(cell_gbm_info_longer_v2$tmz_cell, levels= c("TMZ_Sen", "TMZ_RES" ))
  p2 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, y= displayName, color= drug, shape= tmz_cell))+ geom_point(stat = "identity", size = 4) +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3) + theme(text = element_text(size = 12,  face="bold"))

  p1 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, fill= drug)) + geom_density(alpha=.5) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3) + theme(text = element_text(size = 12,  face="bold"))

  plot_grid(p1+ scale_fill_manual( values=c("#440154FF","#228C8DFF")), p2+ scale_color_manual( values= c("#440154FF","#228C8DFF") ), ncol = 1, align = "v") 
}
gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "simvastatin")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "pamidronate")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "floxuridine")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "nimodipine")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "vardenafil")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "moxifloxacin")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "diltiazem")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "diflunisal")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "saxagliptin")

gbm_candidate_prism_plot_tmz(cell_gbm_info_longer_v2, "icosapent")

want to determine if there is a difference between groups and drug repesonse

#mgmt methylation
cell_gbm_info_longer_v2$MGMT_METH<-  ifelse(cell_gbm_info_longer_v2$`MGMT Methylation (1kb upstream TSS) MGMT_10_131264504_131265504` >0.5, "Hyper methylation (>0.5)", "Hypo methylation (=< 0.5)")

drugs<- unique(cell_gbm_info_longer_v2$drug)
mgmt<- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  #test<- cell_line_info[,c(index[j], 18)]
  cell_line_info<- cell_line_info[!is.na(cell_line_info$MGMT_METH),]
  #groups<- 
  test<- as.vector(as.data.frame(cell_line_info[,18]))
    x<- test[cell_line_info$MGMT_METH == "Hyper methylation (>0.5)",1]
    y<- test[!cell_line_info$MGMT_METH == "Hyper methylation (>0.5)",1]
    x<- as.numeric(unlist(x))
    y<- as.numeric(unlist(y))
    if(length(x) > 1 & length(y) > 1 ){
      res <- wilcox.test(x,y,alternative = "two.sided") 
       mgmt[i]<- res$p.value
    }
}
#egfr COPY NUMBER
cell_gbm_info_longer_v2$EGFR_copy <- ifelse(cell_gbm_info_longer_v2$`EGFR (ERBB1, ERBB) Copy Number 22Q2 Public` >2, "Copy Number > 2", "Copy Number =< 2")

egfr_amp<- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  #test<- cell_line_info[,c(index[j], 18)]
  cell_line_info<- cell_line_info[!is.na(cell_line_info$EGFR_copy ),]
  #groups<- 
  test<- as.vector(as.data.frame(cell_line_info[,18]))
    x<- test[cell_line_info$EGFR_copy  == "Copy Number > 2",1]
    y<- test[!cell_line_info$EGFR_copy  == "Copy Number > 2",1]
    x<- as.numeric(unlist(x))
    y<- as.numeric(unlist(y))
    if(length(x) > 1 & length(y) > 1 ){
      res <- wilcox.test(x,y,alternative = "two.sided") 
       egfr_amp[i]<- res$p.value
    }else{
      print(i)
    }
    
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
#note most of the gbm cell lines have no egfr amplication
#table(cell_gbm_info_longer_v2$Gender)
sex<- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  #test<- cell_line_info[,c(index[j], 18)]
  cell_line_info<- cell_line_info[!is.na(cell_line_info$Gender ),]
  #groups<- 
  test<- as.vector(as.data.frame(cell_line_info[,18]))
    x<- test[cell_line_info$Gender  == "Male",1]
    y<- test[!cell_line_info$Gender  == "Male",1]
    x<- as.numeric(unlist(x))
    y<- as.numeric(unlist(y))
    if(length(x) > 1 & length(y) > 1 ){
      res <- wilcox.test(x,y,alternative = "two.sided") 
       sex[i]<- res$p.value
    }else{
      print(i)
    }
    
}
#tmz non-senssitive cell lines (0.3=> logfoldchange in PRISM)
#A172
#GB1
#KNS81
#SF295
#YH13
#YKG1

cell_gbm_info_longer_v2$tmz_cell<- ifelse(cell_gbm_info_longer_v2$displayName %in% c("A172", "GB1","KNS81", "SF295", "YH13", "YKG1"), "TMZ_RES", "TMZ_Sen")

tmz<- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  #test<- cell_line_info[,c(index[j], 18)]
  cell_line_info<- cell_line_info[!is.na(cell_line_info$tmz_cell ),]
  #groups<- 
  test<- as.vector(as.data.frame(cell_line_info[,18]))
    x<- test[cell_line_info$tmz_cell  == "TMZ_RES",1]
    y<- test[!cell_line_info$tmz_cell  == "TMZ_RES",1]
    x<- as.numeric(unlist(x))
    y<- as.numeric(unlist(y))
    if(length(x) > 1 & length(y) > 1 ){
      res <- wilcox.test(x,y,alternative = "two.sided") 
       tmz[i]<- res$p.value
    }else{
      print(i)
    }
    
}
drugs<- unique(cell_gbm_info_longer_v2$drug)
index<- c(3,4,10,11,12)
test_res<- matrix(nrow= 11, ncol= 5)
for( i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  for(j in 1:5){
    test<- cell_line_info[,c(index[j], 18)]
    groups<- test[,1]== 1
    x<- test[groups,2]
    y<- test[!groups,2]
    x<- as.numeric(unlist(x))
    y<- as.numeric(unlist(y))
    if(length(x) > 1 & length(y) > 1 ){
      res <- wilcox.test(x,y,alternative = "two.sided") 
      test_res[i,j]<- res$p.value
    }else{
      res<- NA
      test_res[i,j]<-res
    }
  }
} 
rownames(test_res)<- drugs
colnames(test_res)<- colnames(cell_gbm_info_longer_v2)[index]
test_res<- cbind(test_res, sex, tmz,  mgmt)
test_res_filter_1<- test_res< 0.05
#p-value for the MGMT promotor
test_res_filter_2<- test_res< 0.16795666
gbm_candidate_prism_plot_two_groups <- function(cell_line_info, drug, info_index, divider= 0, group1="Normal", group2= "Mutation" ){
  
  #index_col <- grepl(colnames(cell_line_info), info)
  cell_gbm_info_longer_v2<- cell_line_info[cell_line_info$drug == drug,]
  
  a <- ifelse(cell_gbm_info_longer_v2[,info_index] ==divider , 19, 17)
  cell_gbm_info_longer_v2$test <- ifelse(cell_gbm_info_longer_v2[,info_index] ==divider , group1, group2)
  #print(a)
  #nam <- colnames(cell_gbm_info_longer_v2)[info_index]
  #cell_gbm_info_longer_v2[,info_index]<- as.factor(cell_gbm_info_longer_v2[,info_index])
  #print(cell_gbm_info_longer_v2[,info_index])
  p2 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, y= displayName, color= test))+ geom_point(stat = "identity", shape = a, size = 4) +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3) 
  #+ theme(axis.text.y = element_text( color = a))

  p1 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, fill= test)) + geom_density(alpha=.5) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3)

  plot_grid(p1+ scale_fill_manual( values=c("#440154FF","#228C8DFF")), p2+ scale_color_manual( values= c("#440154FF","#228C8DFF") ), ncol = 1, align = "v")
  
}
#pten pamidronate
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "pamidronate", 12 )

#Sex
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "vardenafil", 14, divider= "Male", group1="Male", group2= "Female")

cell_gbm_info_longer_v2<- cell_gbm_info_longer_v2[,c(1:18, 20,21, 19)]

different p-value threshold because of low poer. DO NOT TRUST THIS DATA

#"EGFR_copy"
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "diflunisal", 21, divider= "TMZ_RES", group1="TMZ Resistant", group2= "TMZ Sensitive")

if they are reistant to tmz it looks reistant here.

gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "icosapent", 21, divider= "TMZ_RES", group1="TMZ Resistant", group2= "TMZ Sensitive")

gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "floxuridine", 19, divider= "Hyper methylation (>0.5)", group1="Hyper methylation (>0.5)", group2= "Hypo methylation (=< 0.5)")
## Warning: Removed 4 rows containing missing values (geom_point).

gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "moxifloxacin", 14, divider= "Male", group1="Male", group2= "Female")

gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "icosapent", 14, divider= "Male", group1="Male", group2= "Female")

#pten 
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "moxifloxacin", 12 )

#"TP53 (LFS1, p53) Mutation 22Q2 Public
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "pamidronate", 3 )

#"TP53 (LFS1, p53) Mutation 22Q2 Public
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "floxuridine", 3 )

#"RB1 (PPP1R130, RB, OSRC) Mutation 22Q2 Public" 
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "moxifloxacin", 10 )

#"RB1 (PPP1R130, RB, OSRC) Mutation 22Q2 Public" 
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "moxifloxacin", 10 )

#"MTOR (FRAP1, FLJ44809, RAPT1, RAFT1, FRAP, FRAP2) Mutation 22Q2 Public"   
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "moxifloxacin", 11 )

#"MTOR (FRAP1, FLJ44809, RAPT1, RAFT1, FRAP, FRAP2) Mutation 22Q2 Public"   
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "pamidronate", 11 )

#"MTOR (FRAP1, FLJ44809, RAPT1, RAFT1, FRAP, FRAP2) Mutation 22Q2 Public"   
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "temozolomide", 11 )

#"MTOR (FRAP1, FLJ44809, RAPT1, RAFT1, FRAP, FRAP2) Mutation 22Q2 Public"   
gbm_candidate_prism_plot_two_groups(cell_gbm_info_longer_v2, "simvastatin", 11 )

linear regression stuff with methylation of MGMT and efgr amplication

egfr_copy_p<- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  #test<- cell_line_info[,c(index[j], 18)]
  cell_line_info<- cell_line_info[!is.na(cell_line_info$`EGFR (ERBB1, ERBB) Copy Number 22Q2 Public` ),]
  #groups<- 
  test<- as.data.frame(cell_line_info[,c(5, 18)])
  res <- lm(log2change ~ `EGFR (ERBB1, ERBB) Copy Number 22Q2 Public`, test)
  egfr_copy_p[i]<- summary(res)$coefficients[,4][2] 
  
}
mgmt_methy_p<- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug == drugs[i],]
  #test<- cell_line_info[,c(index[j], 18)]
  cell_line_info<- cell_line_info[!is.na(cell_line_info$`MGMT Methylation (1kb upstream TSS) MGMT_10_131264504_131265504` ),]
  #groups<- 
  test<- as.data.frame(cell_line_info[,c(2, 18)])
  res <- lm(log2change ~ `MGMT Methylation (1kb upstream TSS) MGMT_10_131264504_131265504`, test)
  mgmt_methy_p[i]<- summary(res)$coefficients[,4][2] 
  
}
test_res <- cbind(test_res, egfr_copy_p, mgmt_methy_p)
gbm_candidate_prism_plot_cont <- function(cell_line_info, drug, info_index, title ){
  
  #index_col <- grepl(colnames(cell_line_info), info)
  cell_gbm_info_longer_v2<- cell_line_info[cell_line_info$drug == drug,]


  nam <- colnames(cell_gbm_info_longer_v2)[info_index]
  #cell_gbm_info_longer_v2[,info_index]<- as.factor(cell_gbm_info_longer_v2[,info_index])
  #print(cell_gbm_info_longer_v2[,info_index])
  p2 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change, y= displayName, color= get(nam)))+ geom_point(stat = "identity", size= 4) +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3)+ labs(color=title) 
  #+ theme(axis.text.y = element_text( color = a))

  p1 <- ggplot(cell_gbm_info_longer_v2, aes(x=log2change)) + geom_density(alpha=.5) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3)
  
  legend <- get_legend(p2)
  p2<- p2 + theme(legend.position='none')
  ggdraw(plot_grid(plot_grid(p1, p2, ncol=1, align='v'),
                 plot_grid(NULL, legend, ncol=1),
                 rel_widths=c(1, 0.2)))
  
}
gbm_candidate_prism_plot_cont(cell_gbm_info_longer_v2, "moxifloxacin", 2, "MGMT Promoter Methylation(0-1)" )

gbm_candidate_prism_plot_cont(cell_gbm_info_longer_v2, "floxuridine", 2, "MGMT Promoter Methylation(0-1)" )

gbm_candidate_prism_plot_cont(cell_gbm_info_longer_v2, "floxuridine", 5, "EGFR Copy Number" )

gbm_candidate_prism_plot_cont(cell_gbm_info_longer_v2, "moxifloxacin", 5, "EGFR Copy Number" )

gbm_candidate_prism_plot_cont(cell_gbm_info_longer_v2, "nimodipine", 5, "EGFR Copy Number" )

change the tmz resistant test compare the log2fold changes of the resistnat cells lines between drugs not within a drug.

cell_gbm_info_longer_v3<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$tmz_cell =="TMZ_RES", ]

tmz_v2 <- c()
for (i in 1:length(drugs)){
  cell_line_info<- cell_gbm_info_longer_v3[cell_gbm_info_longer_v3$drug %in% c(drugs[i], "temozolomide"),]
   #simvastain does have the A172 cell line so 
    # only keep cell lines with duplicated
  paired_cell_lines<- cell_line_info$displayName[duplicated(cell_line_info$displayName)]
  cell_line_info<- cell_line_info[cell_line_info$displayName %in% paired_cell_lines,]
  test<- as.vector(as.data.frame(cell_line_info[,17:18]))
  x<- test[test$drug  == drugs[i],2]
  y<- test[!test$drug ==  drugs[i],2]
  x<- as.numeric(unlist(x))
  y<- as.numeric(unlist(y))
   
    if(length(x) > 1 & length(y) > 1 ){
      res <- wilcox.test(x,y,alternative = "two.sided", paired = TRUE) 
       tmz_v2[i]<- res$p.value
    }else{
      print(i)
    }
    
}
## [1] 1
#replace the old tmz resistant p-values with the new p-values
colnames(test_res)
##  [1] "TP53 (LFS1, p53) Mutation 22Q2 Public"                                 
##  [2] "EGFR (ERBB1, ERBB) Mutation 22Q2 Public"                               
##  [3] "RB1 (PPP1R130, RB, OSRC) Mutation 22Q2 Public"                         
##  [4] "MTOR (FRAP1, FLJ44809, RAPT1, RAFT1, FRAP, FRAP2) Mutation 22Q2 Public"
##  [5] "PTEN (MHAM, PTEN1, MMAC1, BZS, TEP1) Mutation 22Q2 Public"             
##  [6] "sex"                                                                   
##  [7] "tmz"                                                                   
##  [8] "mgmt"                                                                  
##  [9] "egfr_copy_p"                                                           
## [10] "mgmt_methy_p"
tmz_v2[1]<- 1 # for temzolomide
test_res[,7]<- tmz_v2
#plot.data$stars <- cut(plot.data$p.value, breaks=c(-Inf, 0.001, 0.01, 0.05, Inf), label=c("***", "**", "*", "")) 
test_res_t <- t(test_res)
rownames(test_res_t)<- c("TP53 Mutation - Wilcox", "EGFR Mutation - Wilcox" , "RB1 Mutation - Wilcox", "MTOR Mutation - Wilcox", "PTEN Mutation - Wilcox", "Sex - Wilcox", "TMZ-Resistant - Wilcox Paired", "MGMT Promoter Methylation (high vs low) - Wilcox", "EGFR Copy Number - linear regression", "MGMT Promoter Methylation- linear regression"  )                                         
col_fun = colorRamp2(c(0,  1), c("blue", "white"))
Heatmap(test_res_t, nam= "p-value of biomarker and PRISM Drug Response", col = col_fun, column_names_rot = 45,
               clustering_distance_rows= "euclidean",
               clustering_distance_columns=  "euclidean",
               clustering_method_rows = "ward.D2" ,
               clustering_method_columns="ward.D2", 
        cell_fun = function(j, i, x, y, w, h, fill) {
        if(test_res_t[i, j] < 0.05) {
            grid.text("*", x, y)
        } else if(test_res_t[i, j] < 0.2) {
          grid.text("?", x, y)
      }})

top 5

cell_gbm_info_longer_v3<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug %in% c("pamidronate", "nimodipine", "moxifloxacin", "saxagliptin", "icosapent", "temozolomide"),]
p2 <- ggplot(cell_gbm_info_longer_v3, aes(x=log2change, y= drug, color= drug))+ geom_point(stat = "identity") +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3) 

p1 <- ggplot(cell_gbm_info_longer_v3, aes(x=log2change, fill= drug)) + geom_density(alpha=.2) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3)

plot_grid(p1, p2, ncol = 1, align = "v")

cell_gbm_info_longer_v3<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug %in% c("pamidronate", "nimodipine","temozolomide"),]
cell_gbm_info_longer_v3$drug<- factor(cell_gbm_info_longer_v3$drug , levels= c("pamidronate", "temozolomide", "nimodipine"))
p2 <- ggplot(cell_gbm_info_longer_v3, aes(x=log2change, y= displayName, color= drug))+ geom_point(stat = "identity", size= 4) +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3) + theme(text = element_text(size = 20,  face="bold"))+ scale_color_manual(values = c("#F1605DFF" ,"gold","#721F81FF"))

p1 <- ggplot(cell_gbm_info_longer_v3, aes(x=log2change, fill= drug)) + geom_density(alpha=.8) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3)+ theme(text = element_text(size = 20,  face="bold")) + scale_fill_manual(values = c("#F1605DFF" ,"gold","#721F81FF"))

plot_grid(p1, p2, ncol = 1, align = "v")

cell_gbm_info_longer_v3<- cell_gbm_info_longer_v2[cell_gbm_info_longer_v2$drug %in% c("saxagliptin", "icosapent","temozolomide"),]
cell_gbm_info_longer_v3$drug<- factor(cell_gbm_info_longer_v3$drug , levels= c("saxagliptin", "temozolomide", "icosapent"))
p2 <- ggplot(cell_gbm_info_longer_v3, aes(x=log2change, y= displayName, color= drug))+ geom_point(stat = "identity", size= 4) +theme_minimal() + xlab("PRISM Primary Screen log(fold change)") + ylab("GBM Cell Lines") + geom_vline(xintercept = 0.3) + theme(text = element_text(size = 20,  face="bold"))+ scale_color_brewer(palette = "Set2")

p1 <- ggplot(cell_gbm_info_longer_v3, aes(x=log2change, fill= drug)) + geom_density(alpha=.8) +theme_minimal() +  theme(
    axis.title.x = element_blank())+ geom_vline(xintercept = 0.3)+ theme(text = element_text(size = 20,  face="bold")) + scale_fill_brewer(palette = "Set2")

plot_grid(p1, p2, ncol = 1, align = "v")

make adjustment to the alluvial plot to plot with FDA approved or not

download the compound info from LINCS

source("/home/rstudio/script/functions_cancer_signature_reversion_JLF.R")
library(ggalluvial)
library(stringr)
library(matrixStats)
lincs_commpound_info <- read.delim("~/data/LINCS_210914_LEVEL3/compoundinfo_beta.txt")

gbm

lincs_limma_gbm_res_GI1_fda_approved<- read_csv( "~/output/limma_gbm/220808_SR_LINCS_GBM_top_RES_all.csv")
## New names:
## Rows: 15 Columns: 17
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): pert, cell, type, trend, t_gn_sym dbl (10): ...1, PCID, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_down... lgl (2): fda_database, GBM_CT
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_deseq2_gbm_res_GI1_fda_approved <- read_csv( "~/output/deseq2_gbm/220928_SR_LINCS_GBM_DESEQ2_RES_all.csv")
## New names:
## Rows: 13 Columns: 17
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): pert, cell, type, trend, t_gn_sym dbl (10): ...1, PCID, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_down... lgl (2): FDA, GBM_CT
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_tfl_gbm_res_GI1_fda_approved <- read_csv( "~/output/TF_L_GBM/220808_SR_LINCS_GBM_top_RES_all.csv")
## New names:
## Rows: 17 Columns: 17
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): pert, cell, type, trend, t_gn_sym dbl (10): ...1, PCID, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_down... lgl (2): fda_approval,
## clinical_trial_GBM
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
drug_moa_target_plotting_ct_info(lincs_tfl_gbm_res_GI1_fda_approved ,lincs_commpound_info=lincs_commpound_info, "TLF", "GBM", "~/output/candidate_moa_target_plots/221117" )
## Warning: Ignoring unknown parameters: face
## Ignoring unknown parameters: face
#for limma and deseq2 there is inverse in the color for the plots going to adjust here via !
lincs_limma_gbm_res_GI1_fda_approved$GBM_CT<- !lincs_limma_gbm_res_GI1_fda_approved$GBM_CT
drug_moa_target_plotting_ct_info(lincs_limma_gbm_res_GI1_fda_approved ,lincs_commpound_info=lincs_commpound_info, "limma", "GBM", "~/output/candidate_moa_target_plots/221117" )
## Warning: Ignoring unknown parameters: face
## Ignoring unknown parameters: face
lincs_deseq2_gbm_res_GI1_fda_approved$GBM_CT<- !lincs_deseq2_gbm_res_GI1_fda_approved$GBM_CT
drug_moa_target_plotting_ct_info(lincs_deseq2_gbm_res_GI1_fda_approved ,lincs_commpound_info=lincs_commpound_info, "deseq2", "GBM", "~/output/candidate_moa_target_plots/221117" )
## Warning: Ignoring unknown parameters: face
## Ignoring unknown parameters: face
#fix the logical after plotting
lincs_limma_gbm_res_GI1_fda_approved$GBM_CT<- !lincs_limma_gbm_res_GI1_fda_approved$GBM_CT
lincs_deseq2_gbm_res_GI1_fda_approved$GBM_CT<- !lincs_deseq2_gbm_res_GI1_fda_approved$GBM_CT

PRISM plotting

prism_gbm_cell_lines <- read.csv("~/output/220808_prism_gbm_candidates.csv")
non_fda_candidate_list<- c(lincs_tfl_gbm_res_GI1_fda_approved$pert[!lincs_tfl_gbm_res_GI1_fda_approved$clinical_trial_GBM], lincs_limma_gbm_res_GI1_fda_approved$pert[!lincs_limma_gbm_res_GI1_fda_approved$GBM_CT], lincs_deseq2_gbm_res_GI1_fda_approved$pert[!lincs_deseq2_gbm_res_GI1_fda_approved$GBM_CT])
non_fda_candidate_list<- unique(non_fda_candidate_list)
primary_gbm_res_sub <-prism_gbm_cell_lines[ prism_gbm_cell_lines$Var2 %in% non_fda_candidate_list,]
#write.csv(primary_gbm_res_sub , file= "~/output/TF_L_GBM/220808_prism_tfl_candidates_gbm.csv" )
drug_list <- as.vector(unique(primary_gbm_res_sub$Var2))
drug_fraction<- c()
for (i in 1:length(drug_list)){
  cells <-primary_gbm_res_sub$Sensitive[primary_gbm_res_sub$Var2 %in% drug_list[i]]
  cells<- cells[!is.na(cells)]
  if( length(cells) < 12){
    print(drug_list[i])
  }
  #x<- table(cells)
  drug_fraction[i] <- length(cells[cells== TRUE])/ length(cells)
}
test <- ifelse(drug_fraction > 0.75, TRUE, FALSE)
drug_senstive_precentage_all_drugs <- data.frame(drug_fraction, drug_list, test)

drug_senstive_precentage_all_drugs$drug_list<- factor(drug_senstive_precentage_all_drugs$drug_list, levels= unique(drug_senstive_precentage_all_drugs$drug_list[order(drug_senstive_precentage_all_drugs$drug_fraction)]))
ggplot(drug_senstive_precentage_all_drugs, aes( drug_fraction, drug_list, fill= test ))+ geom_bar(stat="identity",  color= "black") + scale_fill_viridis_d() + ylab("Drug Candidates") + xlab("Fraction of Sensitive Cell Lines") +labs(fill = "75% of Cell line are Sensitive")

#ggsave("~/output/TF_L_GBM/220808_ggplot_TFL_candidates_prism_screen1_sensitive_cell_lines.png")
  tfl_drugs1 <-lincs_tfl_gbm_res_GI1_fda_approved$pert[!lincs_tfl_gbm_res_GI1_fda_approved$clinical_trial_GBM] 
  deseq2_drugs1<- lincs_deseq2_gbm_res_GI1_fda_approved$pert[!lincs_deseq2_gbm_res_GI1_fda_approved$GBM_CT]
  limma_drugs1 <- lincs_limma_gbm_res_GI1_fda_approved$pert[!lincs_limma_gbm_res_GI1_fda_approved$GBM_CT]

  tfl_drugs <- grepl(paste(tfl_drugs1 ,collapse="|"), drug_senstive_precentage_all_drugs$drug_list) 
  deseq2_drugs<- grepl(paste(deseq2_drugs1,collapse="|"), drug_senstive_precentage_all_drugs$drug_list)
  limma_drugs <- grepl(paste(limma_drugs1 ,collapse="|"), drug_senstive_precentage_all_drugs$drug_list)
  
  #shared by all methods or two methods
  all_drugs <- tfl_drugs & deseq2_drugs & limma_drugs
  
  deseq_tfl <- tfl_drugs & deseq2_drugs 
  deseq_tfl<- ifelse(all_drugs ==TRUE, FALSE, deseq_tfl)
  
  deseq_limma <- limma_drugs & deseq2_drugs 
  deseq_limma <- ifelse(all_drugs ==TRUE, FALSE, deseq_limma )
  
  limma_tfl<- limma_drugs & tfl_drugs 
  limma_tfl <- ifelse(all_drugs ==TRUE, FALSE, limma_tfl )
  
  
  #only indivudal methods
  tfl<- setdiff(tfl_drugs1,  c(limma_drugs1, deseq2_drugs1) )
  tfl<- grepl(paste(tfl,collapse="|"), drug_senstive_precentage_all_drugs$drug_list)
  
  limma<- setdiff(limma_drugs1,  c(tfl_drugs1, deseq2_drugs1) )
  limma<- grepl(paste(limma,collapse="|"), drug_senstive_precentage_all_drugs$drug_list)
  
  deseq<- setdiff(deseq2_drugs1,  c(tfl_drugs1,limma_drugs1) )
  deseq<- grepl(paste(deseq,collapse="|"), drug_senstive_precentage_all_drugs$drug_list)
  
  
  #set groups for each node
  #this will match node color
  group <- rep(NA, length(tfl))
  group <- ifelse(tfl, "Transfer Learning", group)
  group <- ifelse(limma_tfl, "limma and Transfer Learning", group)
  group<- ifelse(limma, "limma", group)
  group<- ifelse(deseq_limma, "DESeq2 and limma", group)
  group<- ifelse(deseq, "DESeq2", group)
  group<- ifelse(deseq_tfl, "DESeq2 and Transfer Learning", group)
  group<- ifelse(all_drugs, "All Methods", group)
  
drug_senstive_precentage_all_drugs$method<- factor(group, levels= c("All Methods","DESeq2 and Transfer Learning",  "DESeq2","DESeq2 and limma", "limma", "limma and Transfer Learning", "Transfer Learning" ))

ggplot(drug_senstive_precentage_all_drugs, aes( drug_fraction, drug_list, fill= method ))+ geom_bar(stat="identity",  color= "black") + scale_fill_viridis_d() + ylab("Drug Candidates") + xlab("Fraction of Sensitive Cell Lines") +labs(fill = "Method(s)")  +scale_fill_manual(values=c("All Methods"="#FDE725FF","DESeq2 and Transfer Learning" ="#8FD744FF", "DESeq2"="#35B779FF", "DESeq2 and limma"= "#21908CFF", "limma"= "#31688EFF", "limma and Transfer Learning"= "#443A83FF", "Transfer Learning" = "#440154FF"))+ geom_vline(xintercept = 0.75, size= 5) + theme(text = element_text(size = 60,  face="bold"))
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

 ggsave("~/output/gbm_plots/221117_top_candidate_PRISM_plot.png", width = 49, height= 25) 
gbm_drugs <- unique(c(lincs_limma_gbm_res_GI1_fda_approved$pert, lincs_deseq2_gbm_res_GI1_fda_approved$pert, lincs_tfl_gbm_res_GI1_fda_approved$pert))
  limma_rank <-rep(NA, length(gbm_drugs))
  deseq2_rank<-rep(NA, length(gbm_drugs))
  tfl_rank<-rep(NA, length(gbm_drugs))
for (i in 1:length(gbm_drugs)){
  if(length(grep(gbm_drugs[i], lincs_limma_gbm_res_GI1_fda_approved$pert)) == 1){
    limma_rank[i] <-grep(gbm_drugs[i], lincs_limma_gbm_res_GI1_fda_approved$pert)
  }else{
    limma_rank[i] <- NA
  }
  if(length(grep(gbm_drugs[i], lincs_deseq2_gbm_res_GI1_fda_approved$pert)) == 1){
    deseq2_rank[i] <-grep(gbm_drugs[i], lincs_deseq2_gbm_res_GI1_fda_approved$pert)
  }else{
    deseq2_rank[i] <-NA
  }
  if( length(grep(gbm_drugs[i], lincs_tfl_gbm_res_GI1_fda_approved$pert)) == 1){
    tfl_rank[i] <-grep(gbm_drugs[i], lincs_tfl_gbm_res_GI1_fda_approved$pert)
  }else{
    tfl_rank[i] <-NA
  }
}
method_ranks<- cbind(limma_rank, deseq2_rank, tfl_rank)
drug_ranks<- rowMedians(method_ranks,na.rm= TRUE)
  tfl_drugs1 <-lincs_tfl_gbm_res_GI1_fda_approved$pert
  deseq2_drugs1<- lincs_deseq2_gbm_res_GI1_fda_approved$pert
  limma_drugs1 <- lincs_limma_gbm_res_GI1_fda_approved$pert

  tfl_drugs <- grepl(paste(tfl_drugs1 ,collapse="|"), gbm_drugs) 
  deseq2_drugs<- grepl(paste(deseq2_drugs1,collapse="|"), gbm_drugs)
  limma_drugs <- grepl(paste(limma_drugs1 ,collapse="|"), gbm_drugs)
  
  #shared by all methods or two methods
  all_drugs <- tfl_drugs & deseq2_drugs & limma_drugs
  
  deseq_tfl <- tfl_drugs & deseq2_drugs 
  deseq_tfl<- ifelse(all_drugs ==TRUE, FALSE, deseq_tfl)
  
  deseq_limma <- limma_drugs & deseq2_drugs 
  deseq_limma <- ifelse(all_drugs ==TRUE, FALSE, deseq_limma )
  
  limma_tfl<- limma_drugs & tfl_drugs 
  limma_tfl <- ifelse(all_drugs ==TRUE, FALSE, limma_tfl )
  
  
  #only indivudal methods
  tfl<- setdiff(tfl_drugs1,  c(limma_drugs1, deseq2_drugs1) )
  tfl<- grepl(paste(tfl,collapse="|"), gbm_drugs)
  
  limma<- setdiff(limma_drugs1,  c(tfl_drugs1, deseq2_drugs1) )
  limma<- grepl(paste(limma,collapse="|"), gbm_drugs)
  
  deseq<- setdiff(deseq2_drugs1,  c(tfl_drugs1,limma_drugs1) )
  deseq<- grepl(paste(deseq,collapse="|"), gbm_drugs)
  
  
  #set groups for each node
  #this will match node color
  group <- rep(NA, length(tfl))
  group <- ifelse(tfl, "Transfer Learning", group)
  group <- ifelse(limma_tfl, "limma and Transfer Learning", group)
  group<- ifelse(limma, "limma", group)
  group<- ifelse(deseq_limma, "DESeq2 and limma", group)
  group<- ifelse(deseq, "DESeq2", group)
  group<- ifelse(deseq_tfl, "DESeq2 and Transfer Learning", group)
  group<- ifelse(all_drugs, "All Methods", group)
clinical_trial_GBM <- read_csv("~/data/SearchResults.csv")
all_cand<- data.frame(gbm_drugs, drug_ranks, group)

clinical_trial_check

all_cand$ct <- clinical_trial_check(as.character(all_cand$gbm_drugs), clinical_trial_GBM)

get targets

colnames(lincs_deseq2_gbm_res_GI1_fda_approved)<- NULL
colnames(lincs_tfl_gbm_res_GI1_fda_approved)<- NULL
rownames(lincs_deseq2_gbm_res_GI1_fda_approved)<- NULL
rownames(lincs_tfl_gbm_res_GI1_fda_approved)<- NULL
all_gbm_drugs<- unlist(c(lincs_limma_gbm_res_GI1_fda_approved$pert, lincs_deseq2_gbm_res_GI1_fda_approved[,2], lincs_tfl_gbm_res_GI1_fda_approved[,2]))
gbm_drug_target<- unlist(c(lincs_limma_gbm_res_GI1_fda_approved[,14], lincs_deseq2_gbm_res_GI1_fda_approved[,14], lincs_tfl_gbm_res_GI1_fda_approved[,14]))

gbm_targets<- as.data.frame(cbind(all_gbm_drugs, gbm_drug_target))
gbm_targets<- unique(gbm_targets)
identical(gbm_targets$all_gbm_drugs, all_cand$gbm_drugs)
## [1] TRUE
all_cand$t_gn_sym<- gbm_targets$gbm_drug_target
colnames(all_cand)[1]<- "pert"
all_cand<- all_cand[order(all_cand$drug_ranks),]
drug_moa_target_plotting_all_info(all_cand, lincs_commpound_info=lincs_commpound_info, "all", "gbm", file_path_header= "~/output/candidate_moa_target_plots/221128" )
##    INDEX         x                               stratum                 drug
## 1      1 cmap_name                            vorinostat           vorinostat
## 2      2 cmap_name                            lonafarnib           lonafarnib
## 3      3 cmap_name                             rucaparib            rucaparib
## 4      4 cmap_name                            dabrafenib           dabrafenib
## 5      5 cmap_name                          rivastigmine         rivastigmine
## 6      6 cmap_name                            nimodipine           nimodipine
## 7      7 cmap_name                            apremilast           apremilast
## 8      8 cmap_name                           bivalirudin          bivalirudin
## 9      9 cmap_name                             lapatinib            lapatinib
## 10    10 cmap_name                             lapatinib            lapatinib
## 11    11 cmap_name                           saxagliptin          saxagliptin
## 12    12 cmap_name                           pamidronate          pamidronate
## 13    13 cmap_name                            crizotinib           crizotinib
## 14    14 cmap_name                        pentoxifylline       pentoxifylline
## 15    15 cmap_name                            gabapentin           gabapentin
## 16    16 cmap_name                              ixazomib             ixazomib
## 17    17 cmap_name                             icosapent            icosapent
## 18    18 cmap_name                          atorvastatin         atorvastatin
## 19    19 cmap_name                           galantamine          galantamine
## 20    20 cmap_name                              imatinib             imatinib
## 21    21 cmap_name                              imatinib             imatinib
## 22    22 cmap_name                              imatinib             imatinib
## 23    23 cmap_name                            nifedipine           nifedipine
## 24    24 cmap_name                          thioridazine         thioridazine
## 25    25 cmap_name                             ibrutinib            ibrutinib
## 26    26 cmap_name                        spironolactone       spironolactone
## 27    27 cmap_name                            amiodarone           amiodarone
## 28    28 cmap_name                            vardenafil           vardenafil
## 29    29 cmap_name                           floxuridine          floxuridine
## 30    30 cmap_name                         sulfasalazine        sulfasalazine
## 31    31 cmap_name                         sulfasalazine        sulfasalazine
## 32    32 cmap_name                             erlotinib            erlotinib
## 33    33 cmap_name                           simvastatin          simvastatin
## 34    34 cmap_name                             maraviroc            maraviroc
## 35    35 cmap_name                             dasatinib            dasatinib
## 36    36 cmap_name                             dasatinib            dasatinib
## 37    37 cmap_name                             dasatinib            dasatinib
## 38    38 cmap_name                             dasatinib            dasatinib
## 39    39 cmap_name                             dasatinib            dasatinib
## 40    40 cmap_name                             dasatinib            dasatinib
## 41    41 cmap_name                  isosorbide-dinitrate isosorbide-dinitrate
## 42    42 cmap_name                  isosorbide-dinitrate isosorbide-dinitrate
## 43     1       moa                        HDAC inhibitor           vorinostat
## 44     2       moa         Farnesyltransferase inhibitor           lonafarnib
## 45     3       moa                        PARP inhibitor            rucaparib
## 46     4       moa                         RAF inhibitor           dabrafenib
## 47     5       moa        Acetylcholinesterase inhibitor         rivastigmine
## 48     6       moa               Calcium channel blocker           nimodipine
## 49     7       moa           Phosphodiesterase inhibitor           apremilast
## 50     8       moa                    Thrombin inhibitor          bivalirudin
## 51     9       moa                        EGFR inhibitor            lapatinib
## 52    10       moa                       ErbB2 inhibitor            lapatinib
## 53    11       moa        Dipeptidyl peptidase inhibitor          saxagliptin
## 54    12       moa             Bone resorption inhibitor          pamidronate
## 55    13       moa                         ALK inhibitor           crizotinib
## 56    14       moa           Phosphodiesterase inhibitor       pentoxifylline
## 57    15       moa               Calcium channel blocker           gabapentin
## 58    16       moa                  Proteasome inhibitor             ixazomib
## 59    17       moa        Platelet aggregation inhibitor            icosapent
## 60    18       moa                       HMGCR inhibitor         atorvastatin
## 61    19       moa        Acetylcholinesterase inhibitor          galantamine
## 62    20       moa                         KIT inhibitor             imatinib
## 63    21       moa                       PDGFR inhibitor             imatinib
## 64    22       moa                  Abl kinase inhibitor             imatinib
## 65    23       moa               Calcium channel blocker           nifedipine
## 66    24       moa          Dopamine receptor antagonist         thioridazine
## 67    25       moa                         BTK inhibitor            ibrutinib
## 68    26       moa Mineralocorticoid receptor antagonist       spironolactone
## 69    27       moa             Potassium channel blocker           amiodarone
## 70    28       moa           Phosphodiesterase inhibitor           vardenafil
## 72    30       moa                   Anti-rheumatic drug        sulfasalazine
## 73    31       moa                NFKB pathway inhibitor        sulfasalazine
## 74    32       moa                        EGFR inhibitor            erlotinib
## 75    33       moa                       HMGCR inhibitor          simvastatin
## 76    34       moa               CCK receptor antagonist            maraviroc
## 77    35       moa                         KIT inhibitor            dasatinib
## 78    36       moa                         Src inhibitor            dasatinib
## 79    37       moa                       PDGFR inhibitor            dasatinib
## 80    38       moa                      Ephrin inhibitor            dasatinib
## 81    39       moa                  Abl kinase inhibitor            dasatinib
## 82    40       moa             Tyrosine kinase inhibitor            dasatinib
## 83    41       moa                Nitric oxide stimulant isosorbide-dinitrate
## 84    42       moa           Guanylate cyclase stimulant isosorbide-dinitrate
##       ct                       method
## 1  FALSE  limma and Transfer Learning
## 2  FALSE                        limma
## 3  FALSE            Transfer Learning
## 4  FALSE                        limma
## 5  FALSE             DESeq2 and limma
## 6  FALSE            Transfer Learning
## 7  FALSE                        limma
## 8  FALSE                       DESeq2
## 9  FALSE            Transfer Learning
## 10 FALSE            Transfer Learning
## 11 FALSE             DESeq2 and limma
## 12 FALSE                  All Methods
## 13 FALSE                        limma
## 14 FALSE            Transfer Learning
## 15 FALSE            Transfer Learning
## 16 FALSE             DESeq2 and limma
## 17 FALSE             DESeq2 and limma
## 18 FALSE            Transfer Learning
## 19 FALSE            Transfer Learning
## 20 FALSE                  All Methods
## 21 FALSE                  All Methods
## 22 FALSE                  All Methods
## 23 FALSE DESeq2 and Transfer Learning
## 24 FALSE                  All Methods
## 25 FALSE                       DESeq2
## 26 FALSE            Transfer Learning
## 27 FALSE             DESeq2 and limma
## 28 FALSE                       DESeq2
## 29 FALSE            Transfer Learning
## 30 FALSE                        limma
## 31 FALSE                        limma
## 32 FALSE                       DESeq2
## 33 FALSE            Transfer Learning
## 34 FALSE                        limma
## 35 FALSE            Transfer Learning
## 36 FALSE            Transfer Learning
## 37 FALSE            Transfer Learning
## 38 FALSE            Transfer Learning
## 39 FALSE            Transfer Learning
## 40 FALSE            Transfer Learning
## 41 FALSE            Transfer Learning
## 42 FALSE            Transfer Learning
## 43 FALSE  limma and Transfer Learning
## 44 FALSE                        limma
## 45 FALSE            Transfer Learning
## 46 FALSE                        limma
## 47 FALSE             DESeq2 and limma
## 48 FALSE            Transfer Learning
## 49 FALSE                        limma
## 50 FALSE                       DESeq2
## 51 FALSE            Transfer Learning
## 52 FALSE            Transfer Learning
## 53 FALSE             DESeq2 and limma
## 54 FALSE                  All Methods
## 55 FALSE                        limma
## 56 FALSE            Transfer Learning
## 57 FALSE            Transfer Learning
## 58 FALSE             DESeq2 and limma
## 59 FALSE             DESeq2 and limma
## 60 FALSE            Transfer Learning
## 61 FALSE            Transfer Learning
## 62 FALSE                  All Methods
## 63 FALSE                  All Methods
## 64 FALSE                  All Methods
## 65 FALSE DESeq2 and Transfer Learning
## 66 FALSE                  All Methods
## 67 FALSE                       DESeq2
## 68 FALSE            Transfer Learning
## 69 FALSE             DESeq2 and limma
## 70 FALSE                       DESeq2
## 72 FALSE                        limma
## 73 FALSE                        limma
## 74 FALSE                       DESeq2
## 75 FALSE            Transfer Learning
## 76 FALSE                        limma
## 77 FALSE            Transfer Learning
## 78 FALSE            Transfer Learning
## 79 FALSE            Transfer Learning
## 80 FALSE            Transfer Learning
## 81 FALSE            Transfer Learning
## 82 FALSE            Transfer Learning
## 83 FALSE            Transfer Learning
## 84 FALSE            Transfer Learning
## Warning: Removed 31 rows containing missing values (geom_text_repel).
## Warning: Removed 32 rows containing missing values (geom_text_repel).

update the target plots

deseq_results <- read.csv("~/output/deseq2_gbm/220927_deseq2_gbm_normal_gtx_res.csv", sep=",")
tpm_df <- readRDS("~/output/recount3_gbm_gtex_tpm_df.rds")
metadata <- readRDS("~/output/recount3_gbm_gtex_metadata.rds")
library(recount3)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
human_projects<- available_projects()
## 2022-12-05 21:19:48 caching file sra.recount_project.MD.gz.
## 2022-12-05 21:19:49 caching file gtex.recount_project.MD.gz.
## 2022-12-05 21:19:49 caching file tcga.recount_project.MD.gz.
#get a random project data
test<- create_rse(human_projects[(human_projects$project == "SRP118922"),])
## 2022-12-05 21:19:52 downloading and reading the metadata.
## 2022-12-05 21:19:53 caching file sra.sra.SRP118922.MD.gz.
## 2022-12-05 21:19:53 caching file sra.recount_project.SRP118922.MD.gz.
## 2022-12-05 21:19:54 caching file sra.recount_qc.SRP118922.MD.gz.
## 2022-12-05 21:19:55 caching file sra.recount_seq_qc.SRP118922.MD.gz.
## 2022-12-05 21:19:55 caching file sra.recount_pred.SRP118922.MD.gz.
## 2022-12-05 21:19:56 downloading and reading the feature information.
## 2022-12-05 21:19:56 caching file human.gene_sums.G026.gtf.gz.
## 2022-12-05 21:19:57 downloading and reading the counts: 35 samples across 63856 features.
## 2022-12-05 21:19:57 caching file sra.gene_sums.SRP118922.G026.gz.
## 2022-12-05 21:19:58 construcing the RangedSummarizedExperiment (rse) object.
gene_info <- test@rowRanges
rm(test)
dim(as.data.frame(gene_info))
## [1] 63856    15
identical(gene_info$gene_id, rownames(deseq_results))
## [1] TRUE
deseq_results$Symbol<- gene_info$gene_name
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
drug_target_analysis( "pamidronate" , c("FDPS"), as.data.frame(tpm_df) , metadata$status, deseq_results, "~/output/gbm_plots/")
drug_target_analysis( "icosapent" , c("ACSL3", "ACSL4", "FADS1", "FFAR1", "PPARD", "PPARG", "PTGS1", "PTGS2", "SLC8A1", "TRPV1"), as.data.frame(tpm_df) , metadata$status, deseq_results, "~/output/gbm_plots/")

drug_target_analysis( "nimodipine" , c("AHR", "CACNA1C", "CACNA1D", "CACNA1F", "CACNA1S", "CACNB1","CACNB2", "CACNB3", "CACNB4", "CFTR", "NR3C2" ), as.data.frame(tpm_df) , metadata$status, deseq_results, "~/output/gbm_plots/")

drug_target_analysis( "saxagliptin" , c("DPP4" ), as.data.frame(tpm_df) , metadata$status, deseq_results, "~/output/gbm_plots/")

Save Data

Save Figures

NA

END

Location of final scripts:
/script

Location of data produced:
na

Dates when operations were done:
220901

Versions

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0                recount3_1.4.0             
##  [3] SummarizedExperiment_1.24.0 Biobase_2.54.0             
##  [5] GenomicRanges_1.46.1        GenomeInfoDb_1.30.1        
##  [7] IRanges_2.28.0              S4Vectors_0.32.4           
##  [9] BiocGenerics_0.40.0         MatrixGenerics_1.6.0       
## [11] matrixStats_0.62.0          stringr_1.4.1              
## [13] ggalluvial_0.12.3           ComplexHeatmap_2.10.0      
## [15] circlize_0.4.15             viridis_0.6.2              
## [17] viridisLite_0.4.1           cowplot_1.1.1              
## [19] ggplot2_3.3.6               tidyr_1.2.1                
## [21] readr_2.1.2                
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-3         ggsignif_0.6.3           rjson_0.2.21            
##   [4] ellipsis_0.3.2           XVector_0.34.0           GlobalOptions_0.1.2     
##   [7] clue_0.3-61              rstudioapi_0.13          farver_2.1.1            
##  [10] ggrepel_0.9.1            bit64_4.0.5              fansi_1.0.3             
##  [13] codetools_0.2-18         R.methodsS3_1.8.2        doParallel_1.0.17       
##  [16] cachem_1.0.6             knitr_1.40               jsonlite_1.8.0          
##  [19] Rsamtools_2.10.0         broom_1.0.1              cluster_2.1.2           
##  [22] dbplyr_2.2.1             png_0.1-7                R.oo_1.25.0             
##  [25] compiler_4.1.3           httr_1.4.4               backports_1.4.1         
##  [28] assertthat_0.2.1         Matrix_1.5-1             fastmap_1.1.0           
##  [31] cli_3.4.1                htmltools_0.5.3          tools_4.1.3             
##  [34] gtable_0.3.1             glue_1.6.2               GenomeInfoDbData_1.2.7  
##  [37] dplyr_1.0.10             rappdirs_0.3.3           Rcpp_1.0.9              
##  [40] carData_3.0-5            jquerylib_0.1.4          Biostrings_2.62.0       
##  [43] vctrs_0.4.2              rtracklayer_1.54.0       iterators_1.0.14        
##  [46] xfun_0.33                lifecycle_1.0.2          restfulr_0.0.15         
##  [49] rstatix_0.7.0            XML_3.99-0.10            zlibbioc_1.40.0         
##  [52] scales_1.2.1             vroom_1.5.7              ragg_1.2.2              
##  [55] hms_1.1.2                parallel_4.1.3           RColorBrewer_1.1-3      
##  [58] yaml_2.3.5               curl_4.3.2               memoise_2.0.1           
##  [61] gridExtra_2.3            sass_0.4.2               stringi_1.7.8           
##  [64] RSQLite_2.2.17           highr_0.9                BiocIO_1.4.0            
##  [67] foreach_1.5.2            filelock_1.0.2           BiocParallel_1.28.3     
##  [70] shape_1.4.6              rlang_1.0.6              pkgconfig_2.0.3         
##  [73] systemfonts_1.0.4        bitops_1.0-7             evaluate_0.16           
##  [76] lattice_0.20-45          purrr_0.3.4              GenomicAlignments_1.30.0
##  [79] labeling_0.4.2           bit_4.0.4                tidyselect_1.1.2        
##  [82] magrittr_2.0.3           R6_2.5.1                 generics_0.1.3          
##  [85] DelayedArray_0.20.0      DBI_1.1.3                pillar_1.8.1            
##  [88] withr_2.5.0              abind_1.4-5              RCurl_1.98-1.8          
##  [91] tibble_3.1.8             car_3.1-0                crayon_1.5.2            
##  [94] utf8_1.2.2               BiocFileCache_2.2.1      tzdb_0.3.0              
##  [97] rmarkdown_2.16           GetoptLong_1.0.5         data.table_1.14.2       
## [100] blob_1.2.3               digest_0.6.29            R.utils_2.12.0          
## [103] textshaping_0.3.6        munsell_0.5.0            bslib_0.4.0             
## [106] sessioninfo_1.2.2